This is the data we use to estimate the speaker optimality nad the age specific prior.
There are two types of objects, one used in the 3-object world of the RSA model (all_objects) and one used in the 2-object world of the prior (prior_objects).
rsaUtils <- '
var all_objects = [
{ shape: "triangle", id:1, location: 1},
{ shape: "triangle", id:2, location: 2},
{ shape: "circle", id:1, location: 2}
]
var prior_objects = [
{ shape: "triangle", id:1, location: 1},
{ shape: "circle", id:1, location: 2}
]
var labels = ["dax","wug"]
var lexicon1 = function(utterance, obj){
utterance.label == "dax" ? obj.shape == "triangle" :
utterance.label == "wug" ? obj.shape == "circle" :
true
}
var lexicon2 = function(utterance, obj){
utterance.label == "dax" ? obj.shape == "circle" :
utterance.label == "wug" ? obj.shape == "triangle" :
true
}
var lexiconObjects = {
"dax = triangle": {
triangle: "dax", circle: "wug"
},
"dax = circle": {
triangle: "wug", circle: "dax"
},
}
var lexiconObject = {
"dax = triangle": lexicon1,
"dax = circle" : lexicon2
}
var point = function(utterance, obj){
return obj.location == utterance.point
}
var utterancePrior = function(obj, lexiconName){
var locationsWithShape = _.map(_.filter(all_objects, {shape: obj.shape}), "location")
var point = uniformDraw(locationsWithShape)
var label = lexiconObjects[lexiconName][obj.shape]
return {label: label, point: point}
}
var LexiconPrior = Categorical({vs: ["dax = triangle","dax = circle" ], ps: [1, 1]})
'
rsaModel <- '
var literalListener = cache(function(utterance, priorProbs){
Infer({method: "enumerate", model: function(){
var lexiconName = sample(LexiconPrior);
var lexicon = lexiconObject[lexiconName];
var obj = sample( Categorical({vs: all_objects, ps: priorProbs}));
if ("label" in utterance) {
var truthValue = lexicon(utterance, obj);
condition(truthValue)
}
if (utterance.point) {
var truthValuePoint = point(utterance, obj);
condition(truthValuePoint)
}
return obj.shape
}})
})
var speaker = cache(function(obj, lexiconName, priorProbs, speakerOptimality){
Infer({method: "enumerate", model: function(){
var utterance = utterancePrior(obj, lexiconName);
var L0 = literalListener(utterance, priorProbs);
factor(speakerOptimality * L0.score(obj.shape))
return utterance
}})
})
var pragmaticListener = cache(function(utterance, priorProbs, speakerOptimality){
Infer({method: "enumerate", model: function(){
var lexiconName = sample(LexiconPrior);
var obj = sample( Categorical({vs: all_objects, ps: priorProbs}));
var S1 = speaker(obj, lexiconName, priorProbs, speakerOptimality);
observe(S1, utterance)
return obj.shape == "circle" ? 1 : 0
}})
})
var addNoise = function(dist, noiseParam){
Infer({model: function(){
return flip(noiseParam) ? uniformDraw([0, 1]) : sample(dist)
}
})
}
'
Sanity check for the priors on slope and intercept.
priorSoWebppl <- '
var infData = dataFromR
var priorProbs = [.5, .5, .5]
var speakerOptimality = []
var model = function(){
var so_slope = uniformDrift({a: -2, b: 2, width: 0.4})
var so_int = uniformDrift({a: -2, b: 2, width: 0.4})
map(function(row){
var age = row.age_num
var speakerOptimality = so_int + so_slope * (age - row.minage)
var rsaPredictions = pragmaticListener({label: "dax", point: 2 }, priorProbs, speakerOptimality)
//display(JSON.stringify(row.minage))
}, infData)
return extend({so_int: so_int, so_slope: so_slope})
}
'
so_prior <- readRDS("../saves/so_prior.rds")
# so_prior<- webppl(
# program_code = paste(rsaUtils, rsaModel, priorSoWebppl , sep='\n'),
# data = inf_data%>%mutate(minage = min(age_num)),
# data_var = "dataFromR",
# model_var = "model",
# chains = 1,
# inference_opts = list(method = "forward", samples = 1000, verbose = T)
# )
#saveRDS(so_prior, "../saves/so_prior.rds")
priorPred <- so_prior%>%
spread(Parameter, value)%>%
mutate(chain = factor(Chain))
so_prior <- so_prior%>%
mutate(chain = factor(Chain))
mean_so_pre <-priorPred%>%
summarise(int = mean(so_int),
slope = mean(so_slope))
so_prior_plot <- ggplot(data = priorPred) +
geom_abline(aes(intercept = so_int, slope = so_slope), col = "grey", alpha = 0.3)+
geom_abline(data = mean_so_pre, aes(intercept = int, slope = slope), size = 0.4)+
scale_x_continuous(limits=c(0, 2), name="age", breaks = c(0:2), labels = c(3:5)) +
scale_y_continuous(limits=c(-4, 4), name="Speaker optimality")+
ggtitle("Model speaker optimality prior")+
coord_fixed(ratio=1/4)+
theme_few()
priorPreWebppl <- '
var prefData = dataFromR
var priorProbs = []
var logistic = function(x) {1 / (1 + Math.exp(-x))}
var model = function(){
var pref_slope = uniformDrift({a: -2, b: 2, width: 0.4})
var pref_int = uniformDrift({a: -2, b: 2, width: 0.4})
map(function(row){
var age = row.age_num
var priorReg = logistic(pref_int + pref_slope * (age - row.minage))
var priorProbs= [1-priorReg, priorReg]
var priorPredictions = Infer({method: "enumerate", model: function(){
var obj = sample( Categorical({vs: prior_objects, ps: priorProbs}));
return obj.shape == "circle" ? 1 : 0
}})
// observe(priorPredictions, row.correct)
// display(JSON.stringify(priorReg))
}, prefData)
return extend({pref_int: pref_int, pref_slope: pref_slope})
}
'
pref_prior_con <- readRDS("../saves/pref_prior_con.rds")
# pref_prior_con<- webppl(
# program_code = paste(rsaUtils, rsaModel, priorPrefConWebppl , sep='\n'),
# data = pref_data%>%mutate(minage = min(age_num)),
# data_var = "dataFromR",
# model_var = "model",
# chains = 1,
# inference_opts = list(method = "forward", samples = 1000, verbose = T)
# )
# saveRDS(pref_prior_con, "../saves/pref_prior_con.rds")
prefPriorPredCon <- pref_prior_con%>%
spread(Parameter, value)
pref_prior_con <- pref_prior_con%>%
mutate(chain = factor(Chain),
alignment = "congruent")
x_plot <- seq(0, 2, by = 0.1)
model_predictions <- sapply(1:length(prefPriorPredCon$pref_int), function(idx) {
plogis(prefPriorPredCon$pref_int[idx] + prefPriorPredCon$pref_slope[idx] * x_plot)
})
colnames(model_predictions) <- 1:length(prefPriorPredCon$pref_int)
plot_pref_prior_con <- as.data.frame(cbind(x_plot, model_predictions))
plot_pref_prior_con <- melt(plot_pref_prior_con, id.vars = "x_plot", variable.name = "iteration",
value.name = "y_plot")
plot_prior_con_mean <- plot_pref_prior_con%>%
group_by(x_plot)%>%
summarise(y_plot = mean(y_plot))
pref_prior_plot <- ggplot(plot_pref_prior_con) +
geom_line(aes(x_plot, y_plot, group = iteration), col = "grey",alpha = 0.2) +
#geom_ribbon(data = plot_prior_mean, aes(x =x_plot, ymin = ci_lower, ymax = ci_upper), fill = "red", alpha = 0.5) +
geom_line(data = plot_prior_con_mean, aes(x_plot, y_plot), col = "black", size = 0.4) +
xlab("age") + ylab("Proportion Expected Choice") +
scale_x_continuous(limits=c(0,2), name="age", breaks = c(0:2), labels = c(3:5)) +
scale_y_continuous(limits = c(0, 1), name="P in favor of preferred object")+
ggtitle("Model preference prior")+
coord_fixed(ratio=2)+
theme_few()
There is no information in the priors. All structure in the models further down comes from the data
modelPredWebppl <- '
var levels = function(df, label){
return _.uniq(_.map(df, label));
}
var prefData = _.filter(dataFromR, {experiment: "preference_pretest"})
var infData = _.filter(dataFromR, {experiment: "informativeness_pretest"})
// make finer grained as needed
var binned_age_range = _.range(0, 2, 0.05)
var all_conditions = levels(prefData, "condition")
var foreach = function(fn, lst) {
var foreach_ = function(i) {
if (i < lst.length) {
fn(lst[i]);
foreach_(i + 1);
}
};
foreach_(0);
};
var logistic = function(x) {1 / (1 + Math.exp(-x))}
var model = function(){
var so_slope = uniformDrift({a: -2, b: 2, width: 0.4})
var so_int = uniformDrift({a: -2, b: 2, width: 0.4})
foreach(function(row){
var age = row.age_num
var speakerOptimality = so_int + so_slope * (age - infData[0].minage)
var inf_priorProbs = [.5, .5, .5]
var rsaPredictions = pragmaticListener({label: "dax", point: 2 },
inf_priorProbs, speakerOptimality)
observe(rsaPredictions, row.correct)
}, infData)
var pref_params = map(function(cndtn){
var conditionData = _.filter(prefData, {condition: cndtn})
var pref_slope = uniformDrift({a: -2, b: 2, width: 0.4})
var pref_int = uniformDrift({a: -2, b: 2, width: 0.4})
foreach(function(row){
var age = row.age_num
var priorReg = logistic(pref_int + pref_slope * (age - row.minage))
var priorProbs= [1-priorReg, priorReg]
var priorPredictions = Infer({method: "enumerate", model: function(){
var obj = sample( Categorical({vs: prior_objects, ps: priorProbs}));
return obj.shape == "circle" ? 1 : 0
}})
observe(priorPredictions, row.correct)
}, conditionData)
return {pref_int, pref_slope, condition: cndtn}
}, all_conditions)
var diff_speaker_params = _.filter(pref_params,
{condition: "different_speaker"})[0]
var same_speaker_params = _.filter(pref_params,
{condition: "same_speaker"})[0]
var predictions_by_age = map(function(age_bin){
var speakerOptimality = so_int + so_slope * age_bin
var diff_priorReg = logistic(diff_speaker_params.pref_int +
diff_speaker_params.pref_slope * age_bin)
var diff_priorProbs_congruent = [1-diff_priorReg, 1-diff_priorReg, diff_priorReg]
var diff_priorProbs_incongruent = [diff_priorReg, diff_priorReg, 1-diff_priorReg]
var rsaPredictions_diff_cong = expectation(pragmaticListener({label: "dax", point: 2 }, diff_priorProbs_congruent, speakerOptimality))
var rsaPredictions_diff_incong = expectation(pragmaticListener({label: "dax", point: 2 }, diff_priorProbs_incongruent, speakerOptimality))
var same_priorReg = logistic(same_speaker_params.pref_int +
same_speaker_params.pref_slope * age_bin)
var same_priorProbs_congruent = [1-same_priorReg, 1-same_priorReg, same_priorReg]
var same_priorProbs_incongruent = [same_priorReg, same_priorReg, 1-same_priorReg]
var rsaPredictions_same_cong = expectation(pragmaticListener({label: "dax", point: 2 }, same_priorProbs_congruent, speakerOptimality))
var rsaPredictions_same_incong = expectation(pragmaticListener({label: "dax", point: 2 }, same_priorProbs_incongruent, speakerOptimality))
return extend(
// diff_speaker_params, same_speaker_params,
{
diff_priorReg,
same_priorReg,
speakerOptimality,
rsaPredictions_diff_cong,
rsaPredictions_diff_incong,
rsaPredictions_same_cong,
rsaPredictions_same_incong,
age_bin,
so_slope,
so_int,
})
}, binned_age_range)
return predictions_by_age
}
'
Taking samples and munging the data.
model_pred <- readRDS("../saves/kids_model_predictions.rds")
# model_pred<- webppl(
# program_code = paste(rsaUtils, rsaModel, modelPredWebppl , sep='\n'),
# data = pre_data,
# data_var = "dataFromR",
# model_var = "model",
# chains = 4,
# cores = 4,
# inference_opts = list(method = "MCMC", samples = 900, burn = 200, verbose = T)
# )
#saveRDS(model_pred, file = "../saves/kids_model_predictions.rds")
kids_model_pred <- model_pred%>%
select(value) %>%
map_df(bind_rows)%>%
mutate(iteration = rep(1:3600,each = 40))%>%
select(-diff_priorReg,-same_priorReg,-speakerOptimality,-so_slope,-so_int)%>%
gather(condition, prop_informative, -iteration,-age_bin)%>%
separate(condition, into = c("model", "Speaker", "Alignment"), sep="_")%>%
mutate(Speaker = ifelse(Speaker == "diff", "different_speaker","same_speaker"),
Alignment = ifelse(Alignment == "cong", "congruent","incongruent"))
kids_model_pred_so <- model_pred%>%
select(value) %>%
map_df(bind_rows)%>%
mutate(iteration = rep(1:3600,each = 40))%>%
select(so_slope,so_int,age_bin,iteration,speakerOptimality)
kids_model_pred_prior <- model_pred%>%
select(value) %>%
map_df(bind_rows)%>%
mutate(iteration = rep(1:3600,each = 40))%>%
select(age_bin,iteration,diff_priorReg,same_priorReg)%>%
gather(condition, prop_preferred, -iteration,-age_bin)%>%
mutate(condition = ifelse(condition == "diff_priorReg", "different_speaker","same_speaker"))
Here we estimate the speaker optimality parameter that best captures the data for the different ages. The model based estimate nicely captures the developmental trend.
The model accurately captures the the difference between conditions as well as the developmental trend we see in the data.
These are the predictions from the RSA model which takes in the age specific speaker optimality parameter and the age specific priors. The priors are converted from 2-object world into 3-object world before going into the model. This corresponds to the way we handled the priors for adults.
priorOnlyWebppl <- '
var levels = function(df, label){
return _.uniq(_.map(df, label));
}
var prefData = _.filter(dataFromR, {experiment: "preference_pretest"})
// make finer grained as needed
var binned_age_range = _.range(0, 2, 0.05)
var all_conditions = levels(prefData, "condition")
var foreach = function(fn, lst) {
var foreach_ = function(i) {
if (i < lst.length) {
fn(lst[i]);
foreach_(i + 1);
}
};
foreach_(0);
};
var logistic = function(x) {1 / (1 + Math.exp(-x))}
var model = function(){
var pref_params = map(function(cndtn){
var conditionData = _.filter(prefData, {condition: cndtn})
var pref_slope = uniformDrift({a: -2, b: 2, width: 0.4})
var pref_int = uniformDrift({a: -2, b: 2, width: 0.4})
foreach(function(row){
var age = row.age_num
var priorReg = logistic(pref_int + pref_slope * (age - row.minage))
var priorProbs= [1-priorReg, priorReg]
var priorPredictions = Infer({method: "enumerate", model: function(){
var obj = sample( Categorical({vs: prior_objects, ps: priorProbs}));
return obj.shape == "circle" ? 1 : 0
}})
observe(priorPredictions, row.correct)
}, conditionData)
return {pref_int, pref_slope, condition: cndtn}
}, all_conditions)
var diff_speaker_params = _.filter(pref_params,
{condition: "different_speaker"})[0]
var same_speaker_params = _.filter(pref_params,
{condition: "same_speaker"})[0]
var predictions_by_age = map(function(age_bin){
var diff_priorReg = logistic(diff_speaker_params.pref_int +
diff_speaker_params.pref_slope * age_bin)
var diff_priorProbs_congruent = [1-diff_priorReg, 1-diff_priorReg, diff_priorReg]
var diff_priorProbs_incongruent = [diff_priorReg, diff_priorReg, 1-diff_priorReg]
var same_priorReg = logistic(same_speaker_params.pref_int +
same_speaker_params.pref_slope * age_bin)
var same_priorProbs_congruent = [1-same_priorReg, 1-same_priorReg, same_priorReg]
var same_priorProbs_incongruent = [same_priorReg, same_priorReg, 1-same_priorReg]
var priorPredictions_diff_con = Infer({method: "enumerate", model: function(){
var obj = sample( Categorical({vs: all_objects, ps: diff_priorProbs_congruent}));
return obj.shape == "circle" ? 1 : 0
}})
var priorPredictions_diff_incon = Infer({method: "enumerate", model: function(){
var obj = sample( Categorical({vs: all_objects, ps: diff_priorProbs_incongruent}));
return obj.shape == "circle" ? 1 : 0
}})
var priorPredictions_same_con = Infer({method: "enumerate", model: function(){
var obj = sample( Categorical({vs: all_objects, ps: same_priorProbs_congruent}));
return obj.shape == "circle" ? 1 : 0
}})
var priorPredictions_same_incon = Infer({method: "enumerate", model: function(){
var obj = sample( Categorical({vs: all_objects, ps: same_priorProbs_incongruent}));
return obj.shape == "circle" ? 1 : 0
}})
var prior_diff_con = Math.exp(priorPredictions_diff_con.score(1))
var prior_diff_incon = Math.exp(priorPredictions_diff_incon.score(1))
var prior_same_con = Math.exp(priorPredictions_same_con.score(1))
var prior_same_incon = Math.exp(priorPredictions_same_incon.score(1))
return extend(
{prior_diff_con,
prior_diff_incon,
prior_same_con,
prior_same_incon,
age_bin})
}, binned_age_range)
return predictions_by_age
}
'
prior_only_predictions <- readRDS("../saves/prior_only_predictions.rds")
# prior_only_predictions<- webppl(
# program_code = paste(rsaUtils, rsaModel, priorOnlyWebppl , sep='\n'),
# data = pre_data,
# data_var = "dataFromR",
# model_var = "model",
# chains = 3,
# cores = 3,
# inference_opts = list(method = "MCMC", samples = 1000, burn = 200, verbose = T)
# )
#saveRDS(prior_only_predictions, "../saves/prior_only_predictions.rds")
prior_only_model <- prior_only_predictions %>%
select(value) %>%
map_df(bind_rows)%>%
mutate(iteration = rep(1:3000,each = 40))%>%
gather(condition, prop_informative, -iteration,-age_bin)%>%
separate(condition, into = c("model", "Speaker", "Alignment"), sep="_")%>%
mutate(Speaker = ifelse(Speaker == "diff", "different_speaker","same_speaker"),
Alignment = ifelse(Alignment == "con", "congruent","incongruent"))
model_comp_mean_ci <- bind_rows(
plot_model_pred%>%ungroup()%>%mutate(model = "RSA"),
plot_prior_only_pred%>%ungroup()%>%mutate(model = "Prior Only"),
no_prior_plot%>%ungroup()%>%mutate(model = "No Prior")
)
model_comp <- bind_rows(
kids_model_pred %>%mutate(model = "RSA"),
prior_only_model%>%mutate(model = "Prior Only"),
no_prior_pred%>%select(-so_slope,-so_int,-speakerOptimality)%>%mutate(model = "No Prior")
)